# coalesce也是用来修改分区的，这个函数有个安全阀，必须显示设置shuffle=True才能增加分区
#coding:utf8
from pyspark import SparkContext,SparkConf

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize([("a",1),("b",2),("c",3),("a",2),("b",3),("c",4)],3)

    # 成功
    print(rdd.coalesce(numPartitions=1).glom().collect())
    # 失败
    print(rdd.coalesce(numPartitions=5).glom().collect())
    # 成功
    print(rdd.coalesce(numPartitions=5, shuffle=True).glom().collect())